<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang>
<head><meta name="generator" content="Hexo 3.8.0">
  <meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform">
<meta http-equiv="Cache-Control" content="no-siteapp">
















  
  
  <link href="/hcigmoid/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css">




  
  
  
  

  
    
    
  

  
    
      
    

    
  

  

  

  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic|Lato:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/hcigmoid/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css">

<link href="/hcigmoid/css/main.css?v=5.1.4" rel="stylesheet" type="text/css">


  <link rel="apple-touch-icon" sizes="180x180" href="/hcigmoid/images/apple-touch-icon-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/hcigmoid/images/favicon-32x32-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/hcigmoid/images/favicon-16x16-next.png?v=5.1.4">


  <link rel="mask-icon" href="/hcigmoid/images/logo.svg?v=5.1.4" color="#222">





  <meta name="keywords" content="Hexo, NexT">





  <link rel="alternate" href="/hcigmoid/atom.xml" title="HCigmoid" type="application/atom+xml">






<meta name="description" content="总结心得">
<meta name="keywords" content="feature, model, algorithm">
<meta property="og:type" content="website">
<meta property="og:title" content="HCigmoid">
<meta property="og:url" content="http://guyuecanhui.gitee.io/hcigmoid/page/4/index.html">
<meta property="og:site_name" content="HCigmoid">
<meta property="og:description" content="总结心得">
<meta property="og:locale" content="default">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="HCigmoid">
<meta name="twitter:description" content="总结心得">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/hcigmoid/',
    scheme: 'Gemini',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://guyuecanhui.gitee.io/hcigmoid/page/4/">





  <title>HCigmoid</title>
  








</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="default">

  
  
    
  

  <div class="container sidebar-position-left 
  page-home">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/hcigmoid/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">HCigmoid</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle">Watch, learn and practise</p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/hcigmoid/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br>
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/hcigmoid/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br>
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/hcigmoid/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br>
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/hcigmoid/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br>
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/hcigmoid/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br>
            
            归档
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br>
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off" placeholder="搜索..." spellcheck="false" type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            
  <section id="posts" class="posts-expand">
    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/07/28/feature-selection-spearman 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/07/28/feature-selection-spearman 2/" itemprop="url">常用的特征选择方法之 Spearman 秩相关系数</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-07-28T10:36:03+08:00">
                2019-07-28
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/特征工程/" itemprop="url" rel="index">
                    <span itemprop="name">特征工程</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/07/28/feature-selection-spearman 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/07/28/feature-selection-spearman 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  1.6k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  6
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p><a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener">上一篇</a>里，我们简单的介绍了基于 <strong>Pearson</strong> 相关系数的特征选择方法，本篇介绍另一种使用更加广泛的相关系数：<strong>Spearman</strong> 秩相关系数，简称 <strong>Spearman</strong> 相关系数。<strong>Spearman</strong> 相关系数与 <strong>Pearson</strong> 相关系数、<strong>Kendall</strong> 相关系数并称统计学三大相关系数，足见其重要性。</p>
<p>有了 <strong>Pearson</strong> 相关系数，为什么还要用 <strong>Spearman</strong> 相关系数呢，主要是 <strong>Pearson</strong> 系数只能度量两个服从正态分布的变量之间线性相关性的强弱 (如果不熟悉可以回顾一下上一篇的介绍)，而 <strong>Spearman</strong> 系数只度量<strong>单调关系</strong>，而不考虑具体数值的影响，因此 <strong>Spearman</strong> 相关系数的应用范围更广，不仅对数据分布不作任何假设，能够容忍异常值，也不需要数据的取值是等距的（例如比赛中，第 1 名和第 2 名的距离与第 2 名和第 3 名的距离是不等的），因此除非是考虑性能的影响，能用 <strong>Pearson</strong> 系数的地方都能用 <strong>Spearman</strong> 系数。</p>
<h3 id="Spearman-秩相关系数的定义"><a href="#Spearman-秩相关系数的定义" class="headerlink" title="Spearman 秩相关系数的定义"></a>Spearman 秩相关系数的定义</h3><p><a href="https://blog.csdn.net/liuyuan_jq/article/details/52542211" target="_blank" rel="noopener"><strong>Spearman</strong> 秩相关系数</a>是一个非参数性质（与分布无关）的秩统计参数，是用来度量两个<strong>连续型变量</strong>之间<strong>单调关系</strong>强弱的相关系数，取值范围也是 $[-1,1]$。在没有重复数据的情况下，如果一个变量是另外一个变量的严格单调函数，则 <strong>Spearman</strong> 秩相关系数就是 $1$ 或 $-1$，称变量完全 <strong>Spearman</strong> 秩相关。</p>
<p>这里的秩相关 (<strong>Rank Correlation</strong>)，又称等级相关，是将两变量的样本值按数据的大小顺序排列位次，以各要素样本值的位次代替实际数据而求得的一种统计量。排序不论从大到小还是从小到大排都无所谓，只要保证大家排序的标准一致即可。</p>
<p>用 $\rho_s$ 来表示 <strong>Spearman</strong> 相关系数 (用 $\rho_p$ 表示 <strong>Pearson</strong> 相关系数)。如果每个变量都没有相同的取值 (即没有相同的秩次)，则 <strong>Spearman</strong> 相关系数可由下式计算：</p>
<script type="math/tex; mode=display">
\rho_s=1-\frac{6\sum{d_i^2}}{n(n^2-1)}</script><p>其中，$n$ 表示数据点的个数；<script type="math/tex">d_i</script> 表示数据点 <script type="math/tex">(x_i,y_i)</script> 的秩次 <script type="math/tex">(r_{x_i},r_{y_i})</script> 之差：<script type="math/tex">d_i=r_{x_i}-r_{y_i}</script>。</p>
<p>如果某个变量有重复数据，则计算变量之间的 <strong>Spearman</strong> 相关系数就是计算变量数据秩次之间的 <strong>Pearson</strong> 相关系数：</p>
<script type="math/tex; mode=display">
\rho_s=\rho_{r_x,r_y}=\frac{\text{cov}(r_x,r_y)}{\sigma_{r_x}\sigma_{r_y}}</script><p>其中，$r_x$ 表示变量 $\boldsymbol{x}$ 转换后的秩次。从这个定义可以看出来，<strong>Spearman</strong> 相关系数实际上就是对数据做了秩次变换后的 <strong>Pearson</strong> 相关系数。</p>
<h3 id="举例说明"><a href="#举例说明" class="headerlink" title="举例说明"></a>举例说明</h3><p>我们还是拿上一篇的例子来说明。首先将样本进行秩次变换，样本升序排列后的位次如图 1 所示：</p>

<p>需要说明的是，这里变量 $y$ 有两个重复数据 $0.1$，在排序的时候它们的位次相同，此时可以用相同位次的数据所占的位次之和除以数据的数量 (即 $\frac{1+2}{2}=1.5$) 来作为这些重复数据的位次。</p>
<p>根据定义，当存在重复数据的时候，我们计算秩次 (即 $r_x$, $r_y$) 的 <strong>Pearson</strong> 相关系数 (过程省略)，得到结果 $\rho_s=0.994$，几乎是单调相关了，其数值比直接计算原始数据的 <strong>Pearson</strong> 相关系数 $\rho_p=0.972$ 还要大一些。</p>
<p>实际上，当 <strong>Pearson</strong> 相关系数比较大的时候，<strong>Spearman</strong> 相关系数也比较大；而当 <strong>Pearson</strong> 相关系数比较小的时候，<strong>Spearman</strong> 相关系数仍然可能较大，例如变量之间是指数相关 ($y=e^x$，如图 2 所示) 时，它们的 <strong>Pearson</strong> 相关系数和 <strong>Spearman</strong> 相关系数分别是 $0.7758$ 和 $1.0$。</p>

<p>最后，我们看看<a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener">上一篇图 3</a> 所示的异常数据对 <strong>Spearman</strong> 相关系数的影响，引入异常点 $(0.9,-1.0)$ 后，变量 $x$, $y$ 的 <strong>Pearson</strong> 相关系数降为了 $\rho_p=-0.0556$，它们的 <strong>Spearman</strong> 相关系数也受到了较大的影响，降到了 $\rho_s=0.3234$，也就是较弱的正相关性。但是从这个例子仍然可以看出，与 <strong>Pearson</strong> 相关系数相比，<strong>Spearman</strong> 相关系数对异常值容忍度更高一些。</p>
<blockquote>
<p>附示例的 python 代码：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">&gt;&gt;&gt; </span><span class="keyword">from</span> scipy.stats <span class="keyword">import</span> spearmanr, pearsonr</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>x=[<span class="number">0.1</span>, <span class="number">0.2</span>, <span class="number">0.3</span>, <span class="number">0.4</span>, <span class="number">0.6</span>, <span class="number">0.7</span>, <span class="number">0.8</span>, <span class="number">0.9</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>y=[<span class="number">0.1</span>, <span class="number">0.1</span>, <span class="number">0.2</span>, <span class="number">0.6</span>, <span class="number">0.7</span>, <span class="number">0.8</span>, <span class="number">0.9</span>, <span class="number">1.0</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>spearmanr(x,y)</span><br><span class="line">(<span class="number">0.99402979738800479</span>, <span class="number">5.2961535156451228e-07</span>)</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>rx=[<span class="number">1</span>, <span class="number">2</span>, <span class="number">3</span>, <span class="number">4</span>, <span class="number">5</span>, <span class="number">6</span>, <span class="number">7</span>, <span class="number">8</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>ry=[<span class="number">1.5</span>, <span class="number">1.5</span>, <span class="number">3</span>, <span class="number">4</span>, <span class="number">5</span>, <span class="number">6</span>, <span class="number">7</span>, <span class="number">8</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>pearsonr(rx,ry)</span><br><span class="line">(<span class="number">0.99402979738800501</span>, <span class="number">5.2961535156445373e-07</span>)</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>z=[<span class="number">0.1</span>, <span class="number">0.1</span>, <span class="number">0.2</span>, <span class="number">0.6</span>, <span class="number">0.7</span>, <span class="number">0.8</span>, <span class="number">0.9</span>, <span class="number">-1.0</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>spearmanr(x,z)</span><br><span class="line">(<span class="number">0.32335909071657992</span>, <span class="number">0.43463944855085729</span>)</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>z=[<span class="number">0.1</span>, <span class="number">0.12</span>, <span class="number">0.2</span>, <span class="number">0.6</span>, <span class="number">0.7</span>, <span class="number">0.8</span>, <span class="number">0.9</span>, <span class="number">-1.0</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>spearmanr(x,z)</span><br><span class="line">(<span class="number">0.32335909071657992</span>, <span class="number">0.43463944855085729</span>)</span><br></pre></td></tr></table></figure>
<p>这里，<code>spearmanr</code> 返回的第二个结果是 p-value，其具体含义可参考<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html" target="_blank" rel="noopener">官方文档</a>。</p>
</blockquote>
<h3 id="Take-aways"><a href="#Take-aways" class="headerlink" title="Take-aways"></a>Take-aways</h3><p>本文简单介绍了 Spearman 相关系数，主要注意点总结如下：</p>
<ol>
<li><strong>Spearman</strong> 相关系数是度量两个<strong>连续型变量</strong>之间<strong>单调关系</strong>强弱的相关系数，它对数据的分布不作任何假设，能够容忍异常值，也不需要数据的取值是等距的；</li>
<li><strong>Spearman</strong> 相关系数实际上就是对数据做了秩次变换后的 <strong>Pearson</strong> 相关系数，只要能用 <strong>Pearson</strong> 相关系数的地方就能使用 <strong>Spearman</strong> 相关系数；</li>
<li><strong>Spearman</strong> 相关系数还需要对原始数据进行排序，因此计算复杂度高于 <strong>Pearson</strong> 相关系数，当数据满足 <strong>Pearson​</strong> 相关系数的使用条件时，优先考虑使用 <strong>Pearson</strong> 相关系数。</li>
</ol>
<hr>
<blockquote>
<h4 id="这是特征选择系列文章的第二篇，其他文章可参考："><a href="#这是特征选择系列文章的第二篇，其他文章可参考：" class="headerlink" title="这是特征选择系列文章的第二篇，其他文章可参考："></a>这是特征选择系列文章的第二篇，其他文章可参考：</h4><ol>
<li><a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener">常用的特征选择方法之 Pearson 相关系数</a></li>
<li><a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-spearman/" target="_blank" rel="noopener">常用的特征选择方法之 Spearman 相关系数</a></li>
<li><a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-kendall/" target="_blank" rel="noopener">常用的特征选择方法之 Kendall 秩相关系数</a></li>
</ol>
</blockquote>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/07/28/feature-selection-spearman/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/07/28/feature-selection-spearman/" itemprop="url">常用的特征选择方法之 Spearman 秩相关系数</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-07-28T10:36:03+08:00">
                2019-07-28
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/特征工程/" itemprop="url" rel="index">
                    <span itemprop="name">特征工程</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/07/28/feature-selection-spearman/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/07/28/feature-selection-spearman/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  1.6k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  6
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <p><a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener">上一篇</a>里，我们简单的介绍了基于 <strong>Pearson</strong> 相关系数的特征选择方法，本篇介绍另一种使用更加广泛的相关系数：<strong>Spearman</strong> 秩相关系数，简称 <strong>Spearman</strong> 相关系数。<strong>Spearman</strong> 相关系数与 <strong>Pearson</strong> 相关系数、<strong>Kendall</strong> 相关系数并称统计学三大相关系数，足见其重要性。</p>
<p>有了 <strong>Pearson</strong> 相关系数，为什么还要用 <strong>Spearman</strong> 相关系数呢，主要是 <strong>Pearson</strong> 系数只能度量两个服从正态分布的变量之间线性相关性的强弱 (如果不熟悉可以回顾一下上一篇的介绍)，而 <strong>Spearman</strong> 系数只度量<strong>单调关系</strong>，而不考虑具体数值的影响，因此 <strong>Spearman</strong> 相关系数的应用范围更广，不仅对数据分布不作任何假设，能够容忍异常值，也不需要数据的取值是等距的（例如比赛中，第 1 名和第 2 名的距离与第 2 名和第 3 名的距离是不等的），因此除非是考虑性能的影响，能用 <strong>Pearson</strong> 系数的地方都能用 <strong>Spearman</strong> 系数。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/07/28/feature-selection-spearman/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/07/20/feature-selection-pearson 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/07/20/feature-selection-pearson 2/" itemprop="url">常用的特征选择方法之 Pearson 相关系数</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-07-20T22:36:03+08:00">
                2019-07-20
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/特征工程/" itemprop="url" rel="index">
                    <span itemprop="name">特征工程</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/07/20/feature-selection-pearson 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/07/20/feature-selection-pearson 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  2.3k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  8
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p>众所周知，特征选择是机器学习活动至关重要的一步。最理想的情况下，我们把所有影响目标的独立因素给找出来，然后使用合适的量化手段，就能够得到完美描述目标问题的特征列表，用这些特征去建立合适容量的模型，这样的模型能够完美的匹配我们要解决的任务。</p>
<p>但是实际上这种想法太难实现了，我们往往只能从已有的数据出发，通过一些特征变换和组合得到一些原始特征，然后从这些原始特征中选出与目标相关的特征。</p>
<p>随着深度网络的崛起，越来越多的未经复杂变换的原始特征被加入到了深度网络中，大家期待有用的特征能够被自动的抽取和组合出来。但是这并不意味着特征工程就不需要了，推荐系统的大牛 Xavier 在技术博客《Rules of Machine Learning: Best Practices for ML Engineering》中提到很多关于特征工程的建议，非常值得一读，其中包含的思想就是特征是随着系统的优化进程而逐步添加的，并非一蹴而就，要始终保证特征的简单、直观、可复用、可监控和可靠性，这意味着我们需要时常对系统中存量特征做测试和筛选。</p>
<p>特征选择通常有过滤法（Filter）、打包法（Wrap）和嵌入法（Embed），其中，后两者都是与模型相关的，需要具体问题具体对待，而过滤法是指对特征进行预处理，提前过滤掉一些对目标无益（即对模型无益）的特征，它只考虑任务目标，而与模型无关。</p>
<p>我打算把常用的特征选择方法都再回顾一遍，力争把每种方法都讲得通俗易懂。这篇文章先介绍 <strong>Pearson</strong> 相关系数。</p>
<h3 id="Pearson-相关系数的定义"><a href="#Pearson-相关系数的定义" class="headerlink" title="Pearson 相关系数的定义"></a>Pearson 相关系数的定义</h3><p><strong>Pearson</strong> 相关系数是用来检测两个<strong>连续型变量</strong>之间<strong>线性相关</strong>的程度，取值范围为 $[-1,1]$，正值表示正相关，负值表示负相关，绝对值越大表示线性相关程度越高。在实际做特征工程时候，如果两个变量的相关系数取值为负，可以将特征变量取负号，使之与目标变量正相关，这样来保证所有特征与目标之间都是正相关。</p>
<p>两个变量之间的 <strong>Pearson</strong> 相关系数定义为两个变量之间的协方差和标准差的商：</p>
<script type="math/tex; mode=display">
\rho_{\boldsymbol{x},\boldsymbol{y}}=\frac{\text{cov}(\boldsymbol{x},\boldsymbol{y})}{\sigma_\boldsymbol{x}\sigma_\boldsymbol{y}}=\frac{E[(\boldsymbol{x}-\mu_\boldsymbol{x},\boldsymbol{y}-\mu_\boldsymbol{y})]}{\sigma_\boldsymbol{x}\sigma_\boldsymbol{y}} \qquad(1)</script><p>上式定义了<strong>总体</strong>相关系数，常用希腊小写字母 $\rho$ 作为代表符号。估算样本的协方差和标准差，可得到<strong>样本 Pearson 相关系数</strong>，用英文小写字母 $r$ 表示：</p>
<script type="math/tex; mode=display">
r_{\boldsymbol{x},\boldsymbol{y}}=\frac{\sum ^n _{i=1}(x_i - \overline{x})(y_i - \overline{y})}{\sqrt{\sum ^n _{i=1}(x_i - \overline{x})^2} \sqrt{\sum ^n _{i=1}(y_i - \overline{y})^2}} \qquad(2)</script><p>记 $\boldsymbol{x}’=\boldsymbol{x}-\overline{x}$ 和 $\boldsymbol{y}’=\boldsymbol{y}-\overline{y}$ 表示对变量 $\boldsymbol{x}$ 和 $\boldsymbol{y}$ 进行 $0$ 均值化，则实际上 $\boldsymbol{x}$ 和 $\boldsymbol{y}$ 的 <strong>Pearson</strong> 相关系数就是 $\boldsymbol{x}’$ 和 $\boldsymbol{y}’$ 的 <strong>cosine</strong> 相似度：$r_{\boldsymbol{x},\boldsymbol{y}}=\cos(\boldsymbol{x}’,\boldsymbol{y}’)=\frac{\boldsymbol{x}’\cdot\boldsymbol{y}’}{|\boldsymbol{x}’|\cdot|\boldsymbol{y}’|}$。</p>
<h3 id="Pearson-相关系数的使用条件"><a href="#Pearson-相关系数的使用条件" class="headerlink" title="Pearson 相关系数的使用条件"></a>Pearson 相关系数的使用条件</h3><p>使用 <strong>Pearson</strong> 相关系数之前需要检查数据是否满足前置条件：</p>
<ol>
<li>两个变量间有线性关系；</li>
<li>变量是连续变量；</li>
<li>变量均符合正态分布，且二元分布也符合正态分布；</li>
<li>两变量独立；</li>
<li>两变量的方差不为 0；</li>
</ol>
<p>这些条件在实际中很容易被忽略。</p>
<p>例如，在视频推荐中，我们可以将用户对视频的播放完成度作为目标变量，检测其他连续型特征与它的相关性，或者将这些连续型特征做特定的变换后，检测其与播放完成度的相关性。</p>
<p>但是播放完成度实际上不是正态分布的，如下图所示（实际上大多数日志统计特征，如用户播放视频数、视频播放完成度等，也都不服从正态分布），因此实际上是不能使用 <strong>Pearson</strong> 相关系数的，这时候可以用 <strong>Spearman</strong> 或者 <strong>Kendall</strong> 相关系数来代替。</p>

<p>另外要注意的是，如果两个变量本身就是线性的关系，那么 <strong>Pearson</strong> 相关系数绝对值越大相关性越强，绝对值越小相关性越弱；但在当两个变量关系未知情况下，<strong>Pearson</strong> 相关系数的大小就没有什么指导意义了，它的绝对值大小并不能表征变量间的相关性强弱，这个时候最好能够画图出来看看作为辅助判断。我会在下面的例子里再详细的说明这一点。</p>
<h3 id="举例说明"><a href="#举例说明" class="headerlink" title="举例说明"></a>举例说明</h3><p>我们举个例子来看如何计算 <strong>Pearson</strong> 相关系数（这里仅仅演示计算过程，实际上数据的分布也不满足使用 <strong>Pearson</strong> 相关系数的条件）。</p>
<p>考虑视频推荐场景下，假设我们的目标 (之一) 是最大化视频的播放完成度 $y$，播放完成度的取值范围是 $[0,1]$，我们需要分析哪些因素跟 $y$ 相关，例如有一维特征是表示用户对视频的偏好度，记为 $x$，它的取值范围也是 $[0,1]$，我们把几条样本中 $x$ 和 $y$ 的取值计算出来，并画成散点图，如下所示：</p>

<p>我们可以按照公式 (2) 来计算 $x$ 与 $y$ 的 <strong>Pearson</strong> 相关系数：</p>
<ol>
<li>计算变量平均值：$\overline{x} = 0.5,\ \overline{y}=0.55$；</li>
<li>计算平移后的变量：$\boldsymbol{x}=[-0.4,-0.3,-0.2,-0.1,0.1,0.2,0.3,0.4]$，$\boldsymbol{y}=[-0.45,-0.45,-0.35,0.05,0.15,0.25,0.35,0.45]$；</li>
<li>计算公式 (2) 的结果：$r=\frac{0.73}{\sqrt{0.6}\cdot\sqrt{ 0.94}}=0.972$； </li>
</ol>
<p>通过计算，我们发现，这个特征与目标变量之间的线性相关性非常高，这与我们看图得到的认知是一致的。因此我们可以把这一维特征作为有效特征加入。</p>
<p>但是，如果我们对这个例子稍加修改，将最后一个数据点 $(0.9,1.0)$ 改为 $(0.9,-1.0)$，如图 3 所示：</p>

<p>从我们的观察来看，最后一个数据点可能是噪声或者异常值，对我们判断两个变量的线性相关性应该不造成影响，但是实际上，我们再次计算一下这两个变量的 <strong>Pearson</strong> 相关系数，此时的值仅仅只有 $-0.0556$，可以说是几乎不线性相关了，这说明 <strong>Pearson</strong> 相关系数小并不代表线性相关性一定弱。在这种情况下，我们应该在数据清洗阶段把特征的异常值过滤或者平滑掉以后，再计算它与目标的相关系数。</p>
<p>反过来，<strong>Pearson</strong> 相关系数大也并不代表线性相关性一定强。<a href="https://en.wikiversity.org/wiki/Correlation" target="_blank" rel="noopener">图 4</a> 列举了几个 <strong>Pearson</strong> 相关系数均为 $0.816$ 的变量数据，其中有些变量间并非明显的线性相关，或者是明显的二次相关，只是 <strong>Pearson</strong> 相关系数恰好较大而已。</p>

<blockquote>
<p>附示例的 python 代码：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">&gt;&gt;&gt; </span><span class="keyword">from</span> scipy.stats <span class="keyword">import</span> pearsonr</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>x = [<span class="number">0.1</span>, <span class="number">0.2</span>, <span class="number">0.3</span>, <span class="number">0.4</span>, <span class="number">0.6</span>, <span class="number">0.7</span>, <span class="number">0.8</span>, <span class="number">0.9</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>y = [<span class="number">0.1</span>, <span class="number">0.1</span>, <span class="number">0.2</span>, <span class="number">0.6</span>, <span class="number">0.7</span>, <span class="number">0.8</span>, <span class="number">0.9</span>, <span class="number">1.0</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>pearsonr(x, y)</span><br><span class="line">(<span class="number">0.97203814535663591</span>, <span class="number">5.3516208203873684e-05</span>)</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>z = [<span class="number">0.1</span>, <span class="number">0.1</span>, <span class="number">0.2</span>, <span class="number">0.6</span>, <span class="number">0.7</span>, <span class="number">0.8</span>, <span class="number">0.9</span>, <span class="number">-1.0</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>pearsonr(x, z)</span><br><span class="line">(<span class="number">-0.055618651039326214</span>, <span class="number">0.89592989552025337</span>)</span><br></pre></td></tr></table></figure>
<p>这里，<code>pearsonr</code> 返回的第二个结果是 p-value，其具体含义可参考<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html" target="_blank" rel="noopener">官方文档</a>。</p>
</blockquote>
<h3 id="Take-aways"><a href="#Take-aways" class="headerlink" title="Take-aways"></a>Take-aways</h3><p>本文简单的介绍了基于 <strong>Pearson</strong> 相关系数的特征选择方法，主要注意点总结如下：</p>
<ol>
<li><strong>Pearson</strong> 相关系数是用来检测两个<strong>连续型变量</strong>之间<strong>线性相关</strong>的程度，并且要求这两个变量分别分布服从正态分布；</li>
<li><strong>Pearson</strong> 相关系数仅能度量变量间的线性相关性，如果变量间相关性未知，则 <strong>Pearson</strong> 相关系数的大小没有指导意义，此时需要借助可视化手段辅助判断；</li>
<li>两变量的 <strong>Pearson</strong> 相关系数实际上是这两个变量 $0$ 均值化后的 <strong>cosine</strong> 相似度；</li>
<li>如果两个变量是非线性相关，为了使用线性模型，可以先将特征变量进行非线性变换，使之与目标线性相关；</li>
<li><strong>Pearson</strong> 相关系数对异常值比较敏感，在数据清洗阶段需要将异常值过滤或者平滑处理。</li>
</ol>
<hr>
<blockquote>
<h4 id="这是特征选择系列文章的第一篇，其他文章可参考："><a href="#这是特征选择系列文章的第一篇，其他文章可参考：" class="headerlink" title="这是特征选择系列文章的第一篇，其他文章可参考："></a>这是特征选择系列文章的第一篇，其他文章可参考：</h4><ol>
<li><a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener">常用的特征选择方法之 Pearson 相关系数</a></li>
<li><a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-spearman/" target="_blank" rel="noopener">常用的特征选择方法之 Spearman 相关系数</a></li>
<li><a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-kendall/" target="_blank" rel="noopener">常用的特征选择方法之 Kendall 秩相关系数</a></li>
</ol>
</blockquote>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/07/20/feature-selection-pearson/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/07/20/feature-selection-pearson/" itemprop="url">常用的特征选择方法之 Pearson 相关系数</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-07-20T22:36:03+08:00">
                2019-07-20
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/特征工程/" itemprop="url" rel="index">
                    <span itemprop="name">特征工程</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/07/20/feature-selection-pearson/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/07/20/feature-selection-pearson/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  2.3k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  8
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <p>众所周知，特征选择是机器学习活动至关重要的一步。最理想的情况下，我们把所有影响目标的独立因素给找出来，然后使用合适的量化手段，就能够得到完美描述目标问题的特征列表，用这些特征去建立合适容量的模型，这样的模型能够完美的匹配我们要解决的任务。</p>
<p>但是实际上这种想法太难实现了，我们往往只能从已有的数据出发，通过一些特征变换和组合得到一些原始特征，然后从这些原始特征中选出与目标相关的特征。</p>
<p>随着深度网络的崛起，越来越多的未经复杂变换的原始特征被加入到了深度网络中，大家期待有用的特征能够被自动的抽取和组合出来。但是这并不意味着特征工程就不需要了，推荐系统的大牛 Xavier 在技术博客《Rules of Machine Learning: Best Practices for ML Engineering》中提到很多关于特征工程的建议，非常值得一读，其中包含的思想就是特征是随着系统的优化进程而逐步添加的，并非一蹴而就，要始终保证特征的简单、直观、可复用、可监控和可靠性，这意味着我们需要时常对系统中存量特征做测试和筛选。</p>
<p>特征选择通常有过滤法（Filter）、打包法（Wrap）和嵌入法（Embed），其中，后两者都是与模型相关的，需要具体问题具体对待，而过滤法是指对特征进行预处理，提前过滤掉一些对目标无益（即对模型无益）的特征，它只考虑任务目标，而与模型无关。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/07/20/feature-selection-pearson/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/07/03/ftrl-fm/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/07/03/ftrl-fm/" itemprop="url">用 FTRL 训练 FM 模型</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-07-03T22:45:49+08:00">
                2019-07-03
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/推荐系统/" itemprop="url" rel="index">
                    <span itemprop="name">推荐系统</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/07/03/ftrl-fm/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/07/03/ftrl-fm/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  1.6k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  7
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <p>近期尝试了基于 <strong>FTRL</strong> 来训练 <strong>FM</strong> 模型，用于短视频的排序。这篇博客主要总结一下算法的理论推导和工程化的一些心得。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/07/03/ftrl-fm/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/07/03/ftrl-fm 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/07/03/ftrl-fm 2/" itemprop="url">用 FTRL 训练 FM 模型</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-07-03T22:45:49+08:00">
                2019-07-03
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/推荐系统/" itemprop="url" rel="index">
                    <span itemprop="name">推荐系统</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/07/03/ftrl-fm 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/07/03/ftrl-fm 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  1.6k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  7
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p>近期尝试了基于 <strong>FTRL</strong> 来训练 <strong>FM</strong> 模型，用于短视频的排序。这篇博客主要总结一下算法的理论推导和工程化的一些心得。</p>
<h2 id="一、FM-Factorization-Machines-模型推导"><a href="#一、FM-Factorization-Machines-模型推导" class="headerlink" title="一、FM (Factorization Machines) 模型推导"></a>一、FM (Factorization Machines) 模型推导</h2><h3 id="FM-模型简介"><a href="#FM-模型简介" class="headerlink" title="FM 模型简介"></a>FM 模型简介</h3><p>在设计排序模型时，至关重要的步骤就是特征的构造和选择。除了一些简单单特征外，往往要对特征进行组合，例如对用户的年龄、性别组合，对视频的演员、类别进行组合等，更大的特征空间能够增加模型表征能力。对于特征组合来说，业界现在通用的做法主要有两大类：</p>
<ul>
<li><strong>FM</strong> 系列，常见的模型包括 <strong>FM</strong>，<strong>FFM</strong>，<strong>DeepFM</strong>，它们对特征的取值范围比较敏感。</li>
<li><strong>Tree</strong> 系列，常见的模型包括 <strong>GBDT</strong>，它们对特征的取值范围不敏感。</li>
</ul>
<p>其中，<strong>FM</strong> 系列由于适合处理大规模稀疏数据，并且易于与深度神经网络结合，因此使用十分广泛，成为大厂居家必备。</p>
<p><strong>FM</strong> 模型的主要思想是在 <strong>LR</strong> 的基础上，对所有的特征自动做两两组合$^{[1,2]}$。两两组合最直观的方法就是为每对特征组合设置一个参数（例如 <strong>Poly2</strong> 模型），但是这样就需要 $\text{O}(n^2)$ 个参数，当特征数量很多时，需要的样本量也是巨大的，往往不可能所有的参数都有充足的样本训练。因此 <strong>FM</strong> 考虑使用矩阵分解的方式来还原这个 $n\times n$ 的参数矩阵，只需要 $n\times k$ （$k$ 通常是个很小的常数）的参数即可实现特征两两组合的目的。 </p>
<p>具体来说，给定样本 $z=(\boldsymbol{x},y)$，记 $\boldsymbol{v}_i = (v_i^{(1)},\cdots,v_i^{(d)})^\top$ 为第 $i$ 维特征对应的隐式向量，则 <strong>FM</strong> 模型为：</p>
<script type="math/tex; mode=display">
\begin{align}
f(\boldsymbol{x}|\boldsymbol{w})&=w_0+\sum_{i=1}^n w_ix_i+\sum_{i=1}^{n}\sum_{j=i+1}^{n} (\boldsymbol{v}_i^\top \boldsymbol{v}_j)x_ix_j \\
&=w_0+\sum_{i=1}^n w_ix_i+\frac{1}{2}\Big(\sum_{i=1}^{n}\sum_{j=1}^{n}\sum_{k=1}^{d} v_i^{(k)} v_j^{(k)}x_ix_j- \sum_{i=1}^{n}\sum_{k=1}^{d} (v_i^{(k)}x_i)^2\Big) \\
&=w_0+\sum_{i=1}^n w_ix_i+\frac{1}{2}\sum_{k=1}^{d}\Big(\sum_{i=1}^{n}v_i^{(k)} x_i\sum_{j=1}^{n} v_j^{(k)} x_j- \sum_{i=1}^{n} (v_i^{(k)}x_i)^2\Big) \\
&=w_0+\sum_{i=1}^n w_ix_i+\frac{1}{2}\sum_{k=1}^{d}\Big(\big(\sum_{i=1}^{n}v_i^{(k)} x_i \big)^2- \sum_{i=1}^{n} (v_i^{(k)}x_i)^2\Big)
\end{align} \qquad (1)</script><p><strong>FM</strong> 的参数包括 $\boldsymbol{w}={w_0,\cdots w_n,v_1^{(1)},\cdots v_n^{(d)}}$，容易得到 <strong>FM</strong> 对各参数的偏导如下：</p>
<script type="math/tex; mode=display">
\frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w}=
\begin{cases}
\begin{align}
1 &, \qquad w=w_0 \\
x_i &, \qquad w=w_i,\ i=1,\cdots,n \\
x_i\Big(\sum_{j=1}^n v_j^{(k)}x_j - v_i^{(k)}x_i\Big) &, \qquad w=v_i^{(k)},\ i=1,\cdots,n;\ k=1,\cdots,d
\end{align}
\end{cases} \qquad (2)</script><h3 id="FM-模型求解（回归问题）"><a href="#FM-模型求解（回归问题）" class="headerlink" title="FM 模型求解（回归问题）"></a>FM 模型求解（回归问题）</h3><p>此时直接将 $\hat{y} = f(\boldsymbol{x}|\boldsymbol{w})$ 作为对 $y$ 的预测结果，因此可以将样本 $z=(\boldsymbol{x},y)$ 的损失函数定义为：</p>
<script type="math/tex; mode=display">
l(\boldsymbol{w},z) = \big(\hat{y}-y\big)^2= \big(f(\boldsymbol{x}|\boldsymbol{w})-y\big)^2 \qquad(3)</script><p>损失函数对参数的偏导为：</p>
<script type="math/tex; mode=display">
\begin{align}
\frac{\partial l(\boldsymbol{w},z)}{\partial w} 
&= 2\big(\hat{y}-y\big)\cdot \frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w} \\
&= 2 \big(f(\boldsymbol{x}|\boldsymbol{w})-y\big)\cdot \frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w}
\end{align}\qquad(4)</script><h3 id="FM-模型求解（二分类问题）"><a href="#FM-模型求解（二分类问题）" class="headerlink" title="FM 模型求解（二分类问题）"></a>FM 模型求解（二分类问题）</h3><p>此时将 $\hat{y} = \pi(f(\boldsymbol{x}|\boldsymbol{w}))=\frac{1}{1+e^{-f(\boldsymbol{x}|\boldsymbol{w})}}$  作为对 $y$ 的预测结果，其中，$\pi(x)$ 为 <strong>Sigmoid</strong> 函数。还是分标签取值来进行讨论（损失函数的推导参考 <strong><a href="https://guyuecanhui.github.io/2019/05/15/lr/" target="_blank" rel="noopener">LR 模型</a></strong>）。</p>
<h4 id="1-Label-为-1-0"><a href="#1-Label-为-1-0" class="headerlink" title="1. Label 为 {1,0}"></a>1. Label 为 {1,0}</h4><p>则将样本 $z=(\boldsymbol{x},y)$ 的损失函数定义为 <strong>LogLoss</strong> 函数：</p>
<script type="math/tex; mode=display">
l(\boldsymbol{w},z) = -yf(\boldsymbol{x}|\boldsymbol{w})+\ln(1+e^{f(\boldsymbol{x}|\boldsymbol{w})})\big)\qquad(5)</script><p>损失函数对参数的偏导为：</p>
<script type="math/tex; mode=display">
\begin{align}
\frac{\partial l(\boldsymbol{w},z)}{\partial w} &= -y\cdot\frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w}+\frac{1}{1+e^{f(\boldsymbol{x}|\boldsymbol{w})}}\cdot e^{f(\boldsymbol{x}|\boldsymbol{w})} \cdot\frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w} \\
&=\big(\pi(f(\boldsymbol{x}|\boldsymbol{w}))-y\big)\cdot \frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w} \\
&=\big(\hat{y}-y\big)\cdot \frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w}
\end{align} \qquad (6)</script><h4 id="2-Label-为-1-1"><a href="#2-Label-为-1-1" class="headerlink" title="2. Label 为 {1,-1}"></a>2. Label 为 {1,-1}</h4><p>则将样本 $z=(\boldsymbol{x},y)$ 的损失函数定义为 <strong>SigmoidLoss</strong> 函数：</p>
<script type="math/tex; mode=display">
l(\boldsymbol{w},z) = \ln(1+e^{-yf(\boldsymbol{x}|\boldsymbol{w})})\big)\qquad(7)</script><p>损失函数对参数的偏导为：</p>
<script type="math/tex; mode=display">
\begin{align}
\frac{\partial l(\boldsymbol{w},z)}{\partial w} 
&= \frac{1}{1+e^{-yf(\boldsymbol{x}|\boldsymbol{w})}}\cdot e^{-yf(\boldsymbol{x}|\boldsymbol{w})} \cdot(-y)\cdot\frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w} \\
&=y\cdot\big(\frac{1}{1+e^{-yf(\boldsymbol{x}|\boldsymbol{w})}}-1\big)\cdot \frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w} \\
&=y\cdot\Big(\pi\big(yf(\boldsymbol{x}|\boldsymbol{w})\big)-1\Big)\cdot \frac{\partial f(\boldsymbol{x}|\boldsymbol{w})}{\partial w}
\end{align} \qquad (8)</script><h2 id="二、FTRL-Optimizer-介绍"><a href="#二、FTRL-Optimizer-介绍" class="headerlink" title="二、FTRL Optimizer 介绍"></a>二、FTRL Optimizer 介绍</h2><p>上面一陀公式实际上是优化算法求梯度的时候用到的。优化算法目前有很多种，在如在线更新模型或者在线排序等对性能有严格要求的场景中，模型的稀疏解十分关键。稀疏的模型意味着只保留最关键特征的参数，意味着更少的存储、查询与计算。为了得到模型的稀疏解，通常的做法是使用 <strong>L1</strong> 正则、基于参数大小或者累积梯度大小的截断等技术。其中，<strong>FTRL</strong> 集众家之长，实现了精度与稀疏性的平衡$^{[3]}$。</p>
<p><strong>FTRL</strong> 更像是一种启发式的模型组装，其特征权重的更新公式为：</p>
<script type="math/tex; mode=display">
\boldsymbol{w}^{t+1}=\arg \min_\boldsymbol{w}(\boldsymbol{g}^{1:t}\cdot \boldsymbol{w}+\lambda_1 || \boldsymbol{w}||_1+\frac{1}{2}\lambda_2 || \boldsymbol{w}||_2^2 +\frac{1}{2}\sum_{j=1}^t\sigma^j || \boldsymbol{w}-\boldsymbol{w}^j ||_2^2) \qquad(9)</script><p>其中，$\boldsymbol{g}^{1:t}$ 表示 $1\sim t$ 轮迭代中参数梯度的累积和，其中，<strong>L1</strong> 正则化部分是为了生成稀疏解，<strong>L2</strong> 正则化部分是为了使解更平滑（在论文的推导中不包含这一项），而 $\parallel \boldsymbol{w}-\boldsymbol{w}^t\parallel^2_2$ 是为了保证 $\boldsymbol{w}$ 不要离已迭代过的解太远。经过比较复杂的推导（参考文献 [4]），可以得到每一维参数的求解式：</p>
<script type="math/tex; mode=display">
w^{t+1}_i=
\begin{cases}
\begin{align}
0  &, \qquad |z^t_i|<\lambda_1 \\
-\Big(\lambda_2+\frac{\beta+\sqrt{s^t_i}}{\alpha}\Big)^{-1}\cdot\big(z^t_i-\lambda_1\cdot \text{sgn}(z^t_i)\big) &, \qquad \text{otherwise}
\end{align}
\end{cases} \qquad(10)</script><p>其中，令 <script type="math/tex">z_i^t=g^{1:t}-\sum_{s=1}^t\sigma_s\boldsymbol{w}_s, \ s_i^t=\sum_{j=1}^t (g_i^j)^2</script>，主要是方便存储和迭代计算。</p>
<h2 id="三、基于-FTRL-训练-FM-的算法流程"><a href="#三、基于-FTRL-训练-FM-的算法流程" class="headerlink" title="三、基于 FTRL 训练 FM 的算法流程"></a>三、基于 FTRL 训练 FM 的算法流程</h2><p>用 <strong>FTRL</strong> 来训练 <strong>FM</strong> 模型，由于我们组习惯用 {0,1} 作为样本标签，则根据式 (2), (6), (10)，可以得到如下算法流程：</p>
<h3 id="Algorithm-Ftrl-FM"><a href="#Algorithm-Ftrl-FM" class="headerlink" title="Algorithm Ftrl+FM"></a><strong>Algorithm</strong> Ftrl+FM</h3>
<p><strong>FTRL</strong> 算法特别适合在线更新模型，即基于每条实时样本更新模型。但是出于性能和可靠性考虑，也可以稍加修改应用于离线训练或者近线批量训练。例如，离线训练任务，将每天/每小时的数据作为一个批次用来更新 <strong>FM</strong> 模型：在每轮迭代时，需要将一批次样本的所有梯度、损失等计算结果进行汇总（可以简单的用平均值来代替），再用汇总后的值更新模型。为了训练充分，可以对每个批次的样本迭代训练若干轮。训练完的模型需要将参数 $\boldsymbol{w}, \boldsymbol{s}, \boldsymbol{z}$ 保存起来，下次加载后再增量更新；而在线预测时，只需要加载参数 $\boldsymbol{w}$ 即可。</p>
<p>另外，由于短视频的标签、UP 主等特征变化较快，因此对于离散特征的编码可以考虑使用特征 Hash，虽然牺牲了一定的可解释性，并且存在一定的编码冲突，但是实测下来效果还是不错的，并且工程上确实能省下很多麻烦，提升不少性能。</p>
<p>最后，虽然 <strong>FM</strong> 模型具备表征特征两两组合的能力，但是实际上我们发现由于样本、调参等的限制，并不能充分发掘每对特征组合的作用，并且该模型对于三个及以上特征的组合就完全无能为力了。因此，实际应用时还是不能太依赖模型的自动特征组合能力，如果有什么对业务比较有帮助的特征，还是人工生成，再一起丢到模型里去训练吧。</p>
<h2 id="参考文献"><a href="#参考文献" class="headerlink" title="参考文献"></a>参考文献</h2><p>[1] Rendle, S. (2011). Factorization Machines. <em>IEEE International Conference on Data Mining</em>.<br>[2] Rendle, S. (2012). Factorization machines with libfm. <em>Acm Transactions on Intelligent Systems &amp; Technology, 3</em>(3), 1-22.<br>[3] McMahan, H. B., Holt, G., Sculley, D., Young, M., Ebner, D., Grady, J., … &amp; Chikkerur, S. (2013, August). Ad click prediction: a view from the trenches. In <em>Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining</em> (pp. 1222-1230). ACM.<br>[4] 冯扬 (2014). 在线最优化求解.</p>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/05/15/lr 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/05/15/lr 2/" itemprop="url">二项 Logistic Regression 模型</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-05-15T22:02:00+08:00">
                2019-05-15
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/数学/" itemprop="url" rel="index">
                    <span itemprop="name">数学</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/05/15/lr 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/05/15/lr 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  721
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <h2 id="二项-Logistic-Regression-模型推导"><a href="#二项-Logistic-Regression-模型推导" class="headerlink" title="二项 Logistic Regression 模型推导"></a>二项 <strong>Logistic Regression</strong> 模型推导</h2><h3 id="模型描述"><a href="#模型描述" class="headerlink" title="模型描述"></a>模型描述</h3><p>记 $\pi(x)=\frac{1}{1+e^{-x}}$，二项 <strong>Logistic Regression</strong> 模型是如下的条件概率分布：</p>
<script type="math/tex; mode=display">
\begin{cases}
P(y=1|\boldsymbol{x})=\pi(\boldsymbol{wx})=\frac{1}{1+e^{-\boldsymbol{wx}}}=\frac{e^{\boldsymbol{wx}}}{1+e^{\boldsymbol{wx}}} \\
P(y\not=1|\boldsymbol{x})=1-\pi(\boldsymbol{wx})=\frac{1}{1+e^{\boldsymbol{wx}}}
\end{cases}\qquad(1)</script><h3 id="模型求解（极大似然估计）"><a href="#模型求解（极大似然估计）" class="headerlink" title="模型求解（极大似然估计）"></a>模型求解（极大似然估计）</h3><p>常见的 label 设置有正负样本分别为 {1,0} 或 {1,-1}，下面分别讨论两种设置下的损失函数和梯度的推导。首先要假设训练样本独立同分布并且数量足够，模型中待估计的参数为 $\boldsymbol{w}$，似然函数的目标是 $y_i=1$ 时 $\pi(\boldsymbol{wx}_i)$ 尽可能大，且 $y_i\not =1$ 时 $1-\pi(\boldsymbol{wx}_i)$ 尽可能大。</p>
<h4 id="1-label-为-1-0"><a href="#1-label-为-1-0" class="headerlink" title="1. label 为 {1,0}"></a>1. label 为 {1,0}</h4><p>此时，可以直接将 $\hat{y}=\pi(\boldsymbol{wx})$ 的结果作为对 $y$ 值的预测（或者说是预测结果为 1 的概率）。根据<a href="https://guyuecanhui.github.io/2019/05/11/terminology/" target="_blank" rel="noopener">最大似然估计公式</a>，$p(\boldsymbol{x}_i|\boldsymbol{w})=\big(\pi(\boldsymbol{wx}_i)\big)^{y_i}\cdot\big(1-\pi(\boldsymbol{wx}_i)\big)^{1-y_i}$，对数似然函数可以设计为：</p>
<script type="math/tex; mode=display">
\begin{align}
H(\boldsymbol{w}) &=\arg \max_{\boldsymbol{w}}\sum_{i=1}^N \ln\Big(\big(\pi(\boldsymbol{wx}_i)\big)^{y_i}\cdot\big(1-\pi(\boldsymbol{wx}_i)\big)^{1-y_i}\Big) \\
&=\arg \max_{\boldsymbol{w}}\sum_{i=1}^N \Big(y_i\cdot \ln\big(\pi(\boldsymbol{wx}_i)\big)+(1-y_i)\cdot\big(1-\pi(\boldsymbol{wx}_i)\big)\Big) \\
&=\arg \max_{\boldsymbol{w}}\sum_{i=1}^N \Big(y_i\cdot \ln(\frac{e^{\boldsymbol{wx}_i}}{1+e^{\boldsymbol{wx}_i}})+(1-y_i)\cdot \ln(\frac{1}{1+e^{\boldsymbol{wx}_i}})\Big) \\
&=\arg \max_{\boldsymbol{w}}\sum_{i=1}^N \Big(y_i\boldsymbol{wx}_i-\ln(1+e^{\boldsymbol{wx}_i})\Big)\qquad(2)
\end{align}</script><p>这里，$l_l(\boldsymbol{x},y)=-\big(y\cdot \ln(\hat{y})+(1-y)\cdot\ln(1-\hat{y})\big)=\ln(1+e^{f(\boldsymbol{x})})-yf(\boldsymbol{x})$ 记作样本 $ (\boldsymbol{x},y)$ 的 <strong>LogLoss</strong>，后面会经常见到。</p>
<p>根据损失函数 $l_l(\boldsymbol{x},y)$，对每个维度上的参数分别求导：</p>
<script type="math/tex; mode=display">
\begin{align}
\frac{\partial l_l(\boldsymbol{x},y)}{\partial w_i}
&=\frac{1}{(1+e^{\boldsymbol{wx}})}\cdot e^{\boldsymbol{wx}}\cdot x_i-y\cdot x_i\\
&=\big(\pi(\boldsymbol{wx})-y\big)\cdot x_i\\
&=(\hat{y}-y)\cdot x_i\qquad(3)
\end{align}</script><h4 id="2-label-为-1-1"><a href="#2-label-为-1-1" class="headerlink" title="2. label 为 {1,-1}"></a>2. label 为 {1,-1}</h4><p>此时仍然可以认为 $\pi(\boldsymbol{wx})$ 输出了模型预测样本结果为 1 的概率，但是由于负样本的标签为 -1，因此考虑使用 $p(\boldsymbol{x}_i|\boldsymbol{w})=\frac{1}{1+e^{-y_i\boldsymbol{wx}_i}}$，则对数似然函数可以设计为：</p>
<script type="math/tex; mode=display">
\begin{align}
H(\boldsymbol{w}) &=\arg \max_{\boldsymbol{w}}\sum_{i=1}^N \ln(\frac{1}{1+e^{-y_i\boldsymbol{wx}_i}}) \\
&=\arg \min_{\boldsymbol{w}}\sum_{i=1}^N \ln(1+e^{-y_i\boldsymbol{wx}_i}) \qquad(4)\\
\end{align}</script><p>这里，$l_s(\boldsymbol{x},y)=\ln(1+e^{-y\boldsymbol{wx}})$ 称作样本 $(\boldsymbol{x},y)$ 的 <strong>SigmoidLoss</strong>，后面也会经常看到。</p>
<p>根据损失函数 $l_s(\boldsymbol{x},y)$，对每个维度上的参数分别求导：</p>
<script type="math/tex; mode=display">
\begin{align}
\frac{\partial l_s(\boldsymbol{x},y)}{\partial w_i}
&=\frac{1}{1+e^{-y\boldsymbol{wx}}}\cdot e^{-y\boldsymbol{wx}}\cdot (-y\cdot x_i) \\
&=y\cdot \big(\pi(y\boldsymbol{wx})-1\big)\cdot x_i\qquad(4)\\
\end{align}</script>
          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/05/15/lr/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/05/15/lr/" itemprop="url">二项 Logistic Regression 模型</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-05-15T22:02:00+08:00">
                2019-05-15
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/数学/" itemprop="url" rel="index">
                    <span itemprop="name">数学</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/05/15/lr/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/05/15/lr/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  729
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <p>本文主要介绍一下二项 <strong>Logistic Regression</strong> 模型推导。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/05/15/lr/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/05/11/terminology/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/05/11/terminology/" itemprop="url">符号约定与常用公式</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-05-11T21:51:23+08:00">
                2019-05-11
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/数学/" itemprop="url" rel="index">
                    <span itemprop="name">数学</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/05/11/terminology/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/05/11/terminology/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  780
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <h2 id="一、推荐系统常用符号含义"><a href="#一、推荐系统常用符号含义" class="headerlink" title="一、推荐系统常用符号含义"></a>一、推荐系统常用符号含义</h2><div class="table-container">
<table>
<thead>
<tr>
<th style="text-align:left">符号表示</th>
<th style="text-align:left">含义</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left">$\boldsymbol{x}$</td>
<td style="text-align:left">输入变量，一般为特征向量</td>
</tr>
<tr>
<td style="text-align:left">$\boldsymbol{x}_i=(x_i^{(1)}, \cdots, x_i^{(n)})^{\top}$</td>
<td style="text-align:left">第 $i$ 个输入变量的取值，在推导损失函数等场景下，由于每次只考虑一条样本，记样本为 $\boldsymbol{x}=(x_1,\cdots,x_n)$，此时 $x_i$ 表示样本的第 $i$ 维特征</td>
</tr>
<tr>
<td style="text-align:left"><script type="math/tex">\mathcal{X}=\{\boldsymbol{x}_1,\cdots,\boldsymbol{x}_N\}</script></td>
<td style="text-align:left">输入实例集合</td>
</tr>
<tr>
<td style="text-align:left"><script type="math/tex">(x_j^{(i)})^k</script></td>
<td style="text-align:left">第 $j$ 个输入变量的第 $i$ 维特征取值的 $k$ 次方</td>
</tr>
<tr>
<td style="text-align:left">$y$</td>
<td style="text-align:left">输出变量，一般为样本标签</td>
</tr>
<tr>
<td style="text-align:left">$y_i$</td>
<td style="text-align:left">第 $i$ 个输出变量的取值</td>
</tr>
<tr>
<td style="text-align:left">$\mathcal{Y}={y_1,\cdots,y_N}$</td>
<td style="text-align:left">输出实例集合</td>
</tr>
<tr>
<td style="text-align:left">$(\boldsymbol{x}_i,y_i)$</td>
<td style="text-align:left">第 $i$ 个样本点</td>
</tr>
<tr>
<td style="text-align:left">$\mathcal{T}={(\boldsymbol{x}_1,y_1),\cdots,(\boldsymbol{x}_N,y_N)}$</td>
<td style="text-align:left">训练数据集</td>
</tr>
<tr>
<td style="text-align:left">$\boldsymbol{w}=(w_1,\cdots,w_n)$</td>
<td style="text-align:left">权重向量</td>
</tr>
<tr>
<td style="text-align:left">$w_i^t$</td>
<td style="text-align:left">第 $i$ 维特征的权重在第 $t$ 轮迭代的取值</td>
</tr>
<tr>
<td style="text-align:left">$\parallel \boldsymbol{w} \parallel_i^j$</td>
<td style="text-align:left">权重向量 $\boldsymbol{w}$ 的 Li 范数的 $j$ 次方，例如 L1 范数：$\parallel \boldsymbol{w} \parallel_1$，L2 范数： $\parallel \boldsymbol{w} \parallel_2^2$</td>
</tr>
<tr>
<td style="text-align:left">$\boldsymbol{g}=(g_1,\cdots,g_n)$</td>
<td style="text-align:left">梯度向量</td>
</tr>
<tr>
<td style="text-align:left">$\psi(\boldsymbol{w})$</td>
<td style="text-align:left">正则化函数</td>
</tr>
</tbody>
</table>
</div>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/05/11/terminology/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/05/11/terminology 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/05/11/terminology 2/" itemprop="url">符号约定与常用公式</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-05-11T21:51:23+08:00">
                2019-05-11
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/数学/" itemprop="url" rel="index">
                    <span itemprop="name">数学</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/05/11/terminology 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/05/11/terminology 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  780
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <h2 id="一、推荐系统常用符号含义"><a href="#一、推荐系统常用符号含义" class="headerlink" title="一、推荐系统常用符号含义"></a>一、推荐系统常用符号含义</h2><div class="table-container">
<table>
<thead>
<tr>
<th style="text-align:left">符号表示</th>
<th style="text-align:left">含义</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left">$\boldsymbol{x}$</td>
<td style="text-align:left">输入变量，一般为特征向量</td>
</tr>
<tr>
<td style="text-align:left">$\boldsymbol{x}_i=(x_i^{(1)}, \cdots, x_i^{(n)})^{\top}$</td>
<td style="text-align:left">第 $i$ 个输入变量的取值，在推导损失函数等场景下，由于每次只考虑一条样本，记样本为 $\boldsymbol{x}=(x_1,\cdots,x_n)$，此时 $x_i$ 表示样本的第 $i$ 维特征</td>
</tr>
<tr>
<td style="text-align:left"><script type="math/tex">\mathcal{X}=\{\boldsymbol{x}_1,\cdots,\boldsymbol{x}_N\}</script></td>
<td style="text-align:left">输入实例集合</td>
</tr>
<tr>
<td style="text-align:left"><script type="math/tex">(x_j^{(i)})^k</script></td>
<td style="text-align:left">第 $j$ 个输入变量的第 $i$ 维特征取值的 $k$ 次方</td>
</tr>
<tr>
<td style="text-align:left">$y$</td>
<td style="text-align:left">输出变量，一般为样本标签</td>
</tr>
<tr>
<td style="text-align:left">$y_i$</td>
<td style="text-align:left">第 $i$ 个输出变量的取值</td>
</tr>
<tr>
<td style="text-align:left">$\mathcal{Y}={y_1,\cdots,y_N}$</td>
<td style="text-align:left">输出实例集合</td>
</tr>
<tr>
<td style="text-align:left">$(\boldsymbol{x}_i,y_i)$</td>
<td style="text-align:left">第 $i$ 个样本点</td>
</tr>
<tr>
<td style="text-align:left">$\mathcal{T}={(\boldsymbol{x}_1,y_1),\cdots,(\boldsymbol{x}_N,y_N)}$</td>
<td style="text-align:left">训练数据集</td>
</tr>
<tr>
<td style="text-align:left">$\boldsymbol{w}=(w_1,\cdots,w_n)$</td>
<td style="text-align:left">权重向量</td>
</tr>
<tr>
<td style="text-align:left">$w_i^t$</td>
<td style="text-align:left">第 $i$ 维特征的权重在第 $t$ 轮迭代的取值</td>
</tr>
<tr>
<td style="text-align:left">$\parallel \boldsymbol{w} \parallel_i^j$</td>
<td style="text-align:left">权重向量 $\boldsymbol{w}$ 的 Li 范数的 $j$ 次方，例如 L1 范数：$\parallel \boldsymbol{w} \parallel_1$，L2 范数： $\parallel \boldsymbol{w} \parallel_2^2$</td>
</tr>
<tr>
<td style="text-align:left">$\boldsymbol{g}=(g_1,\cdots,g_n)$</td>
<td style="text-align:left">梯度向量</td>
</tr>
<tr>
<td style="text-align:left">$\psi(\boldsymbol{w})$</td>
<td style="text-align:left">正则化函数</td>
</tr>
</tbody>
</table>
</div>
<h2 id="二、常用定理"><a href="#二、常用定理" class="headerlink" title="二、常用定理"></a>二、常用定理</h2><h3 id="中心极限定理"><a href="#中心极限定理" class="headerlink" title="中心极限定理"></a>中心极限定理</h3><ol>
<li>样本的平均值约等于总体的平均值。</li>
<li>给定一个任意分布的总体，从中随机抽取 $N$ 个样本，抽取 $k$ 次，这 $k$ 组抽样平均值的分布接近正态分布。</li>
<li>经验表明，当每组抽样数量 $N\ge 30$ 时就服从中心极限定理。</li>
</ol>
<h3 id="极大似然估计"><a href="#极大似然估计" class="headerlink" title="极大似然估计"></a>极大似然估计</h3><p><strong>前提假设：</strong>训练样本的分布能代表样本的真实分布；每个样本集中的样本都是所谓独立同分布的随机变量，且有充分的训练样本。</p>
<p><strong>最大似然估计的目的是：</strong>利用已知的样本结果，反推最有可能（最大概率）导致这样结果的参数值。换句话说，极大似然估计提供了一种给定观察数据来评估模型参数的方法，即：<strong>模型已定，参数未知</strong>。</p>
<p>ML估计的求解方法：</p>
<script type="math/tex; mode=display">
\hat{\theta} = \arg \max_{\theta} l(\theta) = \arg \max_{\theta}\prod_{i=1}^N p(\boldsymbol{x}_i|\theta)</script><p>为了便于分析，定义对数似然函数 $H(\theta) = \ln l(\theta)$，则：</p>
<script type="math/tex; mode=display">
\hat{\theta} = \arg \max_{\theta} \ln l(\theta) = \arg \max_{\theta}\sum_{i=1}^N \ln p(\boldsymbol{x}_i|\theta)</script><p>当 $H(\theta)$ 连续可微的情况下，可以通过求导（单个未知参数）或者求梯度（多个未知参数）的方式求解方程。</p>
<h2 id="三、常用的函数和公式"><a href="#三、常用的函数和公式" class="headerlink" title="三、常用的函数和公式"></a>三、常用的函数和公式</h2><h3 id="Sigmoid-函数"><a href="#Sigmoid-函数" class="headerlink" title="Sigmoid 函数"></a>Sigmoid 函数</h3><ul>
<li>表达式：$\pi(x)=\frac{1}{1+e^{-x}}=\frac{e^x}{1+e^x}$；</li>
<li>导数：$\pi’(x)=\pi(x)\big(1-\pi(x)\big)$；</li>
</ul>
<h3 id="LogLoss"><a href="#LogLoss" class="headerlink" title="LogLoss"></a>LogLoss</h3><ul>
<li>样本的 $(\boldsymbol{x},y)$ 的 <strong>SigmoidLoss</strong> 表达式：$l_{l}(\boldsymbol{x},y)=\ln(1+e^{f(\boldsymbol{x})})-yf(\boldsymbol{x})$</li>
<li>导数：$l_l’(\boldsymbol{x},y)=\Big(\pi\big(f(\boldsymbol{x})\big)-y\Big)\cdot f’(\boldsymbol{x})$</li>
<li>使用极大似然估计，标签值为 {0,1}，推导参考 <a href="https://guyuecanhui.github.io/2019/05/15/lr/" target="_blank" rel="noopener">LR 模型</a></li>
</ul>
<h3 id="SigmoidLoss"><a href="#SigmoidLoss" class="headerlink" title="SigmoidLoss"></a>SigmoidLoss</h3><ul>
<li>样本的 $(\boldsymbol{x},y)$ 的 <strong>SigmoidLoss</strong> 表达式：$l_s(\boldsymbol{x},y)=\ln(1+e^{-yf(\boldsymbol{x})})$</li>
<li>导数：$l_s’(\boldsymbol{x},y)=y\Big(\pi\big(y\cdot f(\boldsymbol{x})\big)-1\Big)\cdot f’(\boldsymbol{x})$</li>
<li>使用极大似然估计，标签值为 {-1,1}，推导参考 <a href="https://guyuecanhui.github.io/2019/05/15/lr/" target="_blank" rel="noopener">LR 模型</a></li>
</ul>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
  </section>

  
  <nav class="pagination">
    <a class="extend prev" rel="prev" href="/hcigmoid/page/3/"><i class="fa fa-angle-left"></i></a><a class="page-number" href="/hcigmoid/">1</a><span class="space">&hellip;</span><a class="page-number" href="/hcigmoid/page/3/">3</a><span class="page-number current">4</span><a class="page-number" href="/hcigmoid/page/5/">5</a><span class="space">&hellip;</span><a class="page-number" href="/hcigmoid/page/7/">7</a><a class="extend next" rel="next" href="/hcigmoid/page/5/"><i class="fa fa-angle-right"></i></a>
  </nav>



          </div>
          


          

        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      

      <section class="site-overview-wrap sidebar-panel sidebar-panel-active">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <p class="site-author-name" itemprop="name">古月残辉</p>
              <p class="site-description motion-element" itemprop="description">总结心得</p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/hcigmoid/archives/">
              
                  <span class="site-state-item-count">62</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-categories">
                <a href="/hcigmoid/categories/index.html">
                  <span class="site-state-item-count">6</span>
                  <span class="site-state-item-name">分类</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-tags">
                <a href="/hcigmoid/tags/index.html">
                  <span class="site-state-item-count">70</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          
            <div class="feed-link motion-element">
              <a href="/hcigmoid/atom.xml" rel="alternate">
                <i class="fa fa-rss"></i>
                RSS
              </a>
            </div>
          

          
            <div class="links-of-author motion-element">
                
                  <span class="links-of-author-item">
                    <a href="mailto:guyuecanhui@icloud.com" target="_blank" title="E-Mail">
                      
                        <i class="fa fa-fw fa-envelope"></i>E-Mail</a>
                  </span>
                
            </div>
          

          
          

          
          

          

        </div>
      </section>

      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright">&copy; 2018 &mdash; <span itemprop="copyrightYear">2020</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">古月残辉</span>

  
    <span class="post-meta-divider">|</span>
    <span class="post-meta-item-icon">
      <i class="fa fa-area-chart"></i>
    </span>
    
      <span class="post-meta-item-text">Site words total count&#58;</span>
    
    <span title="Site words total count">105.3k</span>
  
</div>









<script async src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>

        
<div class="busuanzi-count">
  <script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>

  
    <span class="site-uv">
      <i class="fa fa-user"></i> 访问人数
      <span class="busuanzi-value" id="busuanzi_value_site_uv"></span>
      人
    </span>
  

  
    <span class="site-pv">
      <i class="fa fa-eye"></i> 总访问量
      <span class="busuanzi-value" id="busuanzi_value_site_pv"></span>
      次
    </span>
  
</div>








        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  
    <script type="text/javascript" src="/hcigmoid/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  


  


  <script type="text/javascript" src="/hcigmoid/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/hcigmoid/js/src/motion.js?v=5.1.4"></script>



  
  


  <script type="text/javascript" src="/hcigmoid/js/src/affix.js?v=5.1.4"></script>

  <script type="text/javascript" src="/hcigmoid/js/src/schemes/pisces.js?v=5.1.4"></script>



  

  


  <script type="text/javascript" src="/hcigmoid/js/src/bootstrap.js?v=5.1.4"></script>



  


  




	





  





  










  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  <script src="//unpkg.com/valine/dist/Valine.min.js"></script>
  
  <script type="text/javascript">
    var GUEST = ['nick','mail','link'];
    var guest = 'nick,mail,link';
    guest = guest.split(',').filter(item=>{
      return GUEST.indexOf(item)>-1;
    });
    new Valine({
        el: '#comments' ,
        verify: false,
        notify: false,
        appId: '6du4Ppc2TvUuhcccRHSDNH2v-gzGzoHsz',
        appKey: 'zOKNml4W1Bq3OTzEuLt5hUjI',
        placeholder: '感谢阅读！欢迎评论！',
        avatar:'mm',
        guest_info:guest,
        pageSize:'10' || 10,
    });
  </script>



  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/hcigmoid/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'auto') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  
  

  
  
    <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: {
          inlineMath: [ ['$','$'], ["\\(","\\)"]  ],
          processEscapes: true,
          skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
        }
      });
    </script>

    <script type="text/x-mathjax-config">
      MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for (i=0; i < all.length; i += 1) {
          all[i].SourceElement().parentNode.className += ' has-jax';
        }
      });
    </script>
    <script type="text/javascript" src="//cdn.bootcss.com/mathjax/2.7.1/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
  


  

  

</body>
</html>
